---
title: "Calculate Exposure to PLGHA"
author: "Nina Brooks"
date: "6/8/2021"
output: html_document
editor_options: 
  chunk_output_type: console
---

# Set Up
```{r setup}
# load libraries and source functions 
source(here::here("scripts/functions.R"))
setup_plgha(tidylog = TRUE)
```

# Prepare population data

Uses World Bank World Development Indicactor (WDI) data on yearly total population to calculate per capita aid. Data must be downloaded from https://datatopics.worldbank.org/world-development-indicators/. Available in data-raw (on Harvard Dataverse).

```{r population, echo=FALSE}

wdi <- here("data-raw/Other/44afdb6d-e7d2-4688-a70c-6dc201f5fbda_Data.csv") %>% 
  read_csv() %>%
  slice(1:9) %>%
  select(`Country Name`, starts_with("2")) %>%
  pivot_longer(
    -`Country Name`,
    names_to = "year",
    values_to = "pop"
  ) %>%
  rename(country = `Country Name`) %>%
  mutate(
    year = as.numeric(str_extract(year, "[2][0-9][0-9][0-9]")),
    country = case_when(
      country == "Congo, Dem. Rep." ~ "Democratic Republic of the Congo",
      T ~ country
    )
  ) 

```

# Calculate exposure to PLGHA

## Calculate exposure with IHME data
Must download the IHME Development Assistance for Health Database from https://ghdx.healthdata.org/record/ihme-data/development-assistance-health-database-1990-2019. Note that if future versions are used, results will differ from this analysis.

```{r IHME, warning=FALSE}
ihme <- here("data-raw/IHME/IHME_DAH_DATABASE_1990_2019_Y2020M04D23.csv") %>% 
  read_csv() %>%
  filter(recipient_country %in% c("Ethiopia", "Nigeria", "Kenya", "Uganda",
                                  "Ghana", "Burkina Faso", "Niger",
                                  "Democratic Republic of the Congo")) %>%
  filter(year >=2006 & year <=2018) %>%
  filter(elim_ch != 1) %>% # drop double-counted channels per user guide
  mutate(
    unique_id = paste0(year, source, channel, recipient_country),
    across(ends_with("19"), as.numeric),
    across(ends_with("19"), ~.x*1000) # convert to dollars (units are thousands of dollars)
    ) %>%
  select(year, source, channel, recipient_country, dah_19, rmh_dah_19, 
         rmh_fp_dah_19, nch_dah_19, ncd_dah_19, hiv_dah_19, mal_dah_19, 
         tb_dah_19,swap_hss_total_dah_19, oid_dah_19, other_dah_19, 
         unalloc_dah_19) %>%
  group_by(year, source, channel, recipient_country) %>%
  summarise_at(vars(ends_with("19")), sum, na.rm = T) # extra aggregation due to 2 rows per country-year-source-channel (per emails with IHME this additional aggregation step is needed)


ihme_us_to_ssa <- ihme %>%  
  filter(source == "United_States") %>%
  filter(channel == "BIL_USA") %>%
  select(year, recipient_country, dah_19, rmh_fp_dah_19) %>%
  rename(country = recipient_country) %>%
  group_by(country, year) %>%
  left_join(wdi) %>%
  mutate(
    aidpc = dah_19/pop,
    fprhpc = rmh_fp_dah_19/pop
  ) 

exposurePMA_ihme <-  ihme_us_to_ssa %>%
  filter(year >=2009 & year <=2016) %>% # Obama years
  group_by(country) %>%
  summarise(aidpc = mean(aidpc),
            fprhpc = mean(fprhpc)) %>%
  ungroup %>%
  mutate(
    med_aid = median(aidpc),
    med_fprh = median(fprhpc),
    exposure = case_when(
      aidpc > med_aid ~ "High",
      T ~ "Low"),
    ln_aidpc = log(aidpc),
    exposure_fprh = case_when(
      fprhpc > med_fprh ~ "High",
      T ~ "Low")
    )

write_rds(exposurePMA_ihme, here("data-clean/exposurePMA.rds"))


```

## Calculate exposure as % of health spending that comes from the US
Must download the IHME Global Health Spending Database from https://ghdx.healthdata.org/record/ihme-data/global-health-spending-1995-2018. Note that if future versions are used, results will differ from this analysis.

```{r IHME Health Spending, warning=FALSE}
health_sp <- here("data-raw/IHME/IHME_HEALTH_SPENDING_1995_2018_Y2021M09D22.CSV") %>% 
  read_csv() %>%
  filter(location_name %in% c("Ethiopia", "Nigeria", "Kenya", "Uganda",
                                  "Ghana", "Burkina Faso", "Niger", 
                                  "Democratic Republic of the Congo")) %>%
  filter(year >=2009 & year <=2016) %>% # Obama years
  dplyr::select(
    location_name, year, the_total_mean, ghes_total_mean, dah_total_mean,
    the_per_cap_mean, ghes_per_cap_mean, dah_per_cap_mean
  ) %>%
  mutate(
    across(c(the_total_mean, ghes_total_mean, dah_total_mean),
           ~.x*1000
    )
  ) %>%
  rename(country = location_name) %>%
  left_join(wdi) %>%
  mutate(
    ghes_percap_calc = ghes_total_mean/pop
  ) 

```


## combine all exposure into single df
```{r all exposure}
exposure <-  health_sp %>%
  left_join(ihme_us_to_ssa) %>%
  mutate(
    us_pct_ghes = 100*(dah_19/ghes_total_mean),
  ) %>%
  dplyr::select(
    year, country, ghes_total_mean, dah_19, us_pct_ghes
  ) %>%
  group_by(country) %>%
  summarise(
    us_pct_ghes = mean(us_pct_ghes)
    ) %>%
  ungroup %>%
  mutate(
    med_ghes = median(us_pct_ghes),
    exposure_ghes = case_when(
      us_pct_ghes > med_ghes ~ "High",
      T ~ "Low")
  ) %>%
  left_join(
    exposurePMA_ihme %>% select(country, aidpc, med_aid, exposure, fprhpc, 
                                med_fprh, exposure_fprh, ln_aidpc)
  )
  

exposure %>%
  mutate_at(
    vars(starts_with("exposure")),
    ~as_factor(.),
    ~fct_relevel(., ref = "Low")
  ) %>%
  labelled::set_variable_labels(
    # primary exposure measure based on GH aid
    aidpc = "Per capita global health assistance from the US",
    med_aid = "Median per capita global health assistance from the US",
    exposure = "Exposure: above median per capita global health assistance from the US",
    ln_aidpc = "Log per capita global health assistance from the US",
    
    # exposure based on FPRH aid
    fprhpc = "Per capita family planning & reproductive health assistance from the US",
    med_fprh = "Median per capita family planning & reproductive health assistance from the US",
    exposure_fprh = "Exposure: above median per capita FPRH assistance from the US",
    
    # exposure based on % of domestic health spending from US DAH
    us_pct_ghes = "Percent of government health expenditures from the US", 
    med_ghes = "Median percent of government health expenditures from the US",
    exposure_ghes = "Exposure: above median % of government health expenditures from the US"
  ) %>%
  write_rds(here("data-clean/exposure.rds"))


```
